clear
clear matrix
set matsize 1000
set more off


*this command opens the FARE dataset for the year 2008
use ".........\fare2008.dta", clear

*the following code attaches the correspondence between nace rev 2 codes (ape_08) in FARE and the aggregate industry breakdown from input-output tables based on nace rev 1.1
sort ape_08
merge m:1 ape_08 using ".....\Done2.dta"

*we drop cases that we cannot assign
drop if aggregate_ind==.

*we drop cases with missing or zero employment (number of employees at december 31st) 
drop if empl=.
drop if empl==0
rename empl empfte


*In what follows the compute and substract "R&D related workers" by using aggregate profits as a share of aggregate revenue
rename caht turnover
egen turn_aggregate_ind=sum(turnover), by(aggregate_ind)
egen prof_aggregate_ind=sum(profits), by(aggregate_ind)

gen cut=prof_aggregate_ind/turn_aggregate_ind
su cut, de
replace empfte=empfte-cut

*we eliminate firms ending up, after accounting for "R&D related workers", with zero or negative employment 
drop if empfte<=0


*we now trim the data
egen p1_e=pctile(empfte), p(1.5) 
egen p99_e=pctile(empfte), p(98.5) 

drop if empfte<=p1_e
drop if empfte>=p99_e
drop p1_e p99_e


*finally we compute what we need
gen ln_empfte=log(empfte)
egen tot_emp_ind=sum(empfte), by(aggregate_ind)
gen mark_share_2=(empfte/tot_emp_ind)^2
egen double SD_log_emp=sd(ln_empfte), by(aggregate_ind)
egen double Mean_log_emp=mean(ln_empfte), by(aggregate_ind)
gen double dev_mean_4=(ln_empfte-Mean_log_emp)^4
egen double Mean_dev_mean_4=mean(dev_mean_4), by(aggregate_ind)
gen double Sigma_4=SD_log_emp^4

gen n_firms=1

collapse (sum) n_firms mark_share_2 (mean) SD_log_emp Sigma_4 Mean_dev_mean_4, by(aggregate_ind)

save "....\bs_data_all_fin_robust_profits.dta", replace
export excel using ".....\bs_data_all_fin_robust_profits.dta.xls", replace